Undisciplined Art - Chance

Sound of Sight

Lauren Hu

In [1]:
%matplotlib inline
In [2]:
import numpy as np
import matplotlib.pyplot as plt
import librosa as lb
from scipy.signal import stft, istft
import os.path
import IPython.display as ipd
In [3]:
def plti(im, h=5, **kwargs):
    """plot an image. http://www.degeneratestate.org/posts/2016/Oct/23/image-processing-with-numpy/ """
    y = im.shape[0]
    x = im.shape[1]
    w = (y/x) * h
    plt.figure(figsize=(w,h))
    plt.imshow(im, interpolation="none", **kwargs)
    plt.axis('off')
In [4]:
def to_grayscale(im, weights = np.c_[0.2989, 0.5870, 0.1140]):
    """Transforms a colour image to a greyscale image by taking the mean of the RGB values, weighted by the matrix weights
    http://www.degeneratestate.org/posts/2016/Oct/23/image-processing-with-numpy/ """
    tile = np.tile(weights, reps=(im.shape[0],im.shape[1],1))
    return np.sum(tile * im, axis=2)
In [5]:
def imageToAudio(image):
    '''image [input] image file as string
    x [output] audio array from image matrix '''
    im = plt.imread(image)
    plti(im)
    img = to_grayscale(im) 
    img = np.flip(img, axis=0)
    t, x = istft(img)
    return ipd.Audio(x, rate = 2*22050)
In [6]:
shire, sr_shire = lb.load('TheShire.mp3')
sr = 22050; ytime = len(shire)//10
In [7]:
N = 2048; hop = 1024; f, t, S = stft(shire, sr, nperseg = N, noverlap = 7*N/8) 
freqstep = 22050/(N); maxN = 200; freqmax = maxN*freqstep; a = len(t)/5

Audio clip and spectrogram of a real song:

In [8]:
ipd.Audio(shire[0:ytime], rate = sr)
Out[8]:
In [9]:
a = len(t)/5
plt.figure(figsize=(18,10)); plt.imshow(np.abs(S[0:maxN,:]), cmap='inferno',vmin=0, vmax=0.01, origin='lower', extent=(0,S.shape[1],0,freqmax))
plt.xticks((0,a,2*a,3*a,4*a,5*a), ('0:00','0:30','1:00','1:30','2:00','2:30')); plt.xlabel('Time [minutes]'); plt.ylabel('Frequency [Hz]'); plt.title('Spectrogram of Shire'); plt.show()

But why not start from a picture to create audio?

In [10]:
imageToAudio('diagonal2.png')
Out[10]:
In [11]:
imageToAudio('derpcorn.png')
Out[11]:
In [12]:
imageToAudio('banksky.jpg')
# https://www.sothebys.com/en/articles/sothebys-gets-banksyed-at-contemporary-art-auction-in-london 
Out[12]:
In [13]:
imageToAudio('trump.jpg')
Out[13]:
In [14]:
imageToAudio('monalisa.jpg')
Out[14]: